Code
tomap <- fish %>%
group_by(protection, reef) %>%
summarise(latitude = mean(latitude), longitude = mean(longitude)) %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)
mapview::mapview(tomap, zcol = "protection")First we provide an interactive map of the sites location by protection level to have a better view of Figure 2 in main manuscript.
tomap <- fish %>%
group_by(protection, reef) %>%
summarise(latitude = mean(latitude), longitude = mean(longitude)) %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326)
mapview::mapview(tomap, zcol = "protection")Biomass increased in Cabo Pulmo since the 1998 data baseline. Thus, it provides a good study case to understand how the NRSI works. First, we extract only sites that were monitored in 1998 and 1999 and in the following years from 2009 onward.
cp_sites <- fish %>%
filter(region == "Cabo Pulmo") %>%
filter(year < 2001) %>%
pull(reef) %>% unique()Then we compare those sites with others from the La Paz region, which are under a Lighly Protected area.
more_sites <- c(cp_sites, "ESPIRITU_SANTO_ISLOTES_NORTE", "ESPIRITU_SANTO_BALLENA", "ESPIRITU_SANTO_PUNTA_LOBOS")
library(Hmisc)
# Calculate the mean biomass and standard error
summary_data <- fish %>%
filter(reef %in% more_sites) %>%
group_by(year, region, reef, depth2, transect) %>%
summarise(biomass = sum(biomass)) %>%
group_by(year, region, reef) %>%
summarise(
biomass_mean = mean(biomass),
biomass_se = sd(biomass) / sqrt(n()) # Standard error
)
# Plot with error bars
ggplot(summary_data, aes(x=year, y=biomass_mean, col=reef, group=reef)) +
geom_point() +
geom_errorbar(aes(ymin=biomass_mean - biomass_se, ymax=biomass_mean + biomass_se), width=0.2) +
facet_grid(~region) +
labs(y = "Mean Biomass", title = "Mean Biomass with Standard Error")Cabo Pulmo sites, which have shown significant recovery in biomass, exhibit greater variability. This is due to the presence of larger fish schools that create substantial variation within transects.
Why are not all sites in Cabo Pulmo recovering to similarly high levels? The issue lies in the scale at which we assess regional health. The location of surveys, such as more or less exposed areas, becomes crucial. This variability is important to consider. Consequently, biomass distributions are skewed to the left, and recovery is reflected in the right tail of the biomass distribution.
Let’s now see how the index works in Cabo Pulmo and La Paz.
# Calculate the mean biomass and standard error
nrsi <- fish %>%
filter(reef %in% more_sites) %>%
mutate(index_levels = case_when(
str_detect(trophic_level_f, "4-4.5") ~ "UTL",
str_detect(trophic_level_f, "2-2.5") ~ "LTL",
TRUE ~ "CTL"
),
protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>%
mutate(degree = round(latitude)) %>%
group_by(year, reef, region, degree, latitude, longitude, depth2, index_levels, transect) %>%
summarise(biomass = sum(biomass, na.rm = TRUE)) %>%
group_by(year, reef, region, index_levels, degree, latitude, longitude) %>%
summarise(biomass = mean(biomass, na.rm = TRUE)) %>%
group_by(year, reef, region, degree, latitude, longitude) %>%
mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>%
select(-biomass) %>%
pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
mutate(
UTL = if_else(is.na(UTL), 0, UTL), # Replace NA with 0 for UTL calculations
LTL = if_else(is.na(LTL), 0, LTL), # Replace NA with 0 for LTL calculations
CTL = if_else(is.na(CTL), 0, CTL), # Replace NA with 0 for CTL calculations
nrsi = case_when(
LTL > UTL + CTL ~ UTL / (UTL + CTL), # Condition 1: Use only UTL if LTL is greater than UTL + CTL
TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL) # Condition 2: Compute as normal otherwise
)
)
nrsi %>%
group_by(year, region) %>%
summarise(nrsi = mean(nrsi)) %>%
ggplot(aes(x=year, y=nrsi)) +
geom_point() +
facet_grid(~region) If we want to compare these two regions, we can use the bootstrapping technique by resampling the averages NRSI values. This way we obtain the following graph:
resample_mean <- function(df, n = 9999) {
replicate(n, {
sampled <- df[sample(nrow(df), replace = TRUE), ]
mean((sampled$nrsi), na.rm = TRUE)
})
}
resampled_prod <- nrsi %>%
group_by(region) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(region, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
ggplot(toplot, aes(x = ResampledMeans, fill = region)) +
geom_density(binwidth = 0.1, alpha = 0.7, position = 'identity') +
geom_vline(data = resampled_prod, aes(xintercept = Median, color = region), linetype = "dashed", size = 1) +
labs(title = "Distribution of Resampled Means by Protection Group",
x = "Resampled Means",
y = "Frequency")Or by year:
resample_mean <- function(df, n = 9999) {
replicate(n, {
sampled <- df[sample(nrow(df), replace = TRUE), ]
mean((sampled$nrsi), na.rm = TRUE)
})
}
resampled_prod <- nrsi %>%
group_by(year, region) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(region, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
ggplot(toplot, aes(y = factor(year), x = Median, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1) +
geom_vline(xintercept = 0, linetype = 2) +
labs(x = "NRSI", y = "") +
scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
xlim(-0.8,0.8) +
facet_grid(~region) +
theme_bw() +
theme(legend.position = "",
axis.text.x = element_text(angle = 90, vjust = .5)) We can also explore within the Cabo Pulmo area, sites that are inside vs the ones that are outside the protection polygon:
nrsi <- fish %>%
mutate(index_levels = case_when(
str_detect(trophic_level_f, "4-4.5") ~ "UTL",
str_detect(trophic_level_f, "2-2.5") ~ "LTL",
TRUE ~ "CTL"
),
protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>%
mutate(degree = round(latitude)) %>%
filter(region == "Cabo Pulmo") %>%
group_by(year, reef, protection, degree, latitude, longitude, depth2, index_levels, transect) %>%
summarise(biomass = sum(biomass, na.rm = TRUE)) %>%
group_by(year, reef, protection, index_levels, degree, latitude, longitude) %>%
summarise(biomass = mean(biomass, na.rm = TRUE)) %>%
group_by(year, reef, protection, degree, latitude, longitude) %>%
mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>%
select(-biomass) %>%
pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
mutate(
UTL = if_else(is.na(UTL), 0, UTL), # Replace NA with 0 for UTL calculations
LTL = if_else(is.na(LTL), 0, LTL), # Replace NA with 0 for LTL calculations
CTL = if_else(is.na(CTL), 0, CTL), # Replace NA with 0 for CTL calculations
nrsi = case_when(
LTL > UTL + CTL ~ UTL / (UTL + CTL), # Condition 1: Use only UTL if LTL is greater than UTL + CTL
TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL) # Condition 2: Compute as normal otherwise
)
)
nrsi %>%
select(reef, protection) %>%
unique()# A tibble: 247 × 6
# Groups: year, reef, protection, degree, latitude, longitude [247]
year degree latitude longitude reef protection
<dbl> <dbl> <dbl> <dbl> <chr> <fct>
1 1999 23 23.5 -109. BAJO_CABO_PULMO Fully Protected
2 1999 23 23.4 -109. CANTILES_CABO_PULMO Fully Protected
3 2000 23 23.4 -109. CANTIL_MEDIO Fully Protected
4 2000 23 23.5 -109. MORROS_CABO_PULMO Fully Protected
5 2009 23 23.5 -109. BAJO_CABO_PULMO Fully Protected
6 2009 23 23.4 -109. BARRA_PRIMERA Fully Protected
7 2009 23 23.4 -109. BLEDITO Not Protected
8 2009 23 23.4 -109. CANTILES_CABO_PULMO Fully Protected
9 2009 23 23.4 -109. CANTIL_MEDIO Fully Protected
10 2009 23 23.4 -109. CASITAS Fully Protected
# ℹ 237 more rows
resample_mean <- function(df, n = 9999) {
replicate(n, {
sampled <- df[sample(nrow(df), replace = TRUE), ]
mean((sampled$nrsi), na.rm = TRUE)
})
}
resampled_prod <- nrsi %>%
filter(year > 2015) %>%
group_by(protection) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(protection, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1) +
geom_vline(xintercept = 0, linetype = 2) +
labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
xlim(-0.8,0.8) +
#facet_grid(~protection) +
theme_bw() +
theme(legend.position = "",
axis.text.x = element_text(angle = 90, vjust = .5))
p2 <- fish %>%
mutate(protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fish Refuge", "Fully Protected"))) %>%
filter(region == "Cabo Pulmo") %>%
select(reef, protection, latitude, longitude) %>%
unique() %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>%
ggplot() +
geom_sf(data = dafishr::mx_coastline, fill = NA) +
geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
geom_sf(aes(col=protection)) +
coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
labs(col = "") +
theme_bw() +
theme(legend.position = "top")
library(patchwork)
p2 + p1 As there is a different number of reefs between inside and outside the area, we can try random sampling 3 reefs within the park to compare with the 3 outside to see if sampling size actually influence this difference, and we can see that actually the sites have consistently higher NRSI values.
set.seed(123)
sampled_fully_protected_reefs <-
bind_rows(
nrsi %>%
ungroup() %>%
filter(protection == "Fully Protected") %>%
distinct(reef) %>%
sample_n(3) %>%
inner_join(nrsi, by = "reef"),
nrsi %>%
filter(protection == "Not Protected")
)
resampled_prod <- sampled_fully_protected_reefs %>%
group_by(protection) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(protection, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1) +
geom_vline(xintercept = 0, linetype = 2) +
labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
xlim(-0.8,0.8) +
#facet_grid(~protection) +
theme_bw() +
theme(legend.position = "",
axis.text.x = element_text(angle = 90, vjust = .5))
p2 <- sampled_fully_protected_reefs %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>%
ggplot() +
geom_sf(data = dafishr::mx_coastline, fill = NA) +
geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
geom_sf(aes(col=protection)) +
coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
labs(col = "") +
theme_bw() +
theme(legend.position = "top")
p1+p2set.seed(111)
sampled_fully_protected_reefs <-
bind_rows(
nrsi %>%
ungroup() %>%
filter(protection == "Fully Protected") %>%
distinct(reef) %>%
sample_n(3) %>%
inner_join(nrsi, by = "reef"),
nrsi %>%
filter(protection == "Not Protected")
)
resampled_prod <- sampled_fully_protected_reefs %>%
group_by(protection) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(protection, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1) +
geom_vline(xintercept = 0, linetype = 2) +
labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
xlim(-0.8,0.8) +
#facet_grid(~protection) +
theme_bw() +
theme(legend.position = "",
axis.text.x = element_text(angle = 90, vjust = .5))
p2 <- sampled_fully_protected_reefs %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>%
ggplot() +
geom_sf(data = dafishr::mx_coastline, fill = NA) +
geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
geom_sf(aes(col=protection)) +
coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
labs(col = "") +
theme_bw() +
theme(legend.position = "top")
p1+p2set.seed(341)
sampled_fully_protected_reefs <-
bind_rows(
nrsi %>%
ungroup() %>%
filter(protection == "Fully Protected") %>%
distinct(reef) %>%
sample_n(3) %>%
inner_join(nrsi, by = "reef"),
nrsi %>%
filter(protection == "Not Protected")
)
resampled_prod <- sampled_fully_protected_reefs %>%
group_by(protection) %>%
nest() %>%
mutate(
ResampledMeans = map(data, ~resample_mean(.x)),
Median = map_dbl(ResampledMeans, median)
) %>%
select(protection, ResampledMeans, Median)
toplot <- resampled_prod %>%
unnest(ResampledMeans)
p1 <- ggplot(toplot, aes(y = protection, x = Median, fill = stat(x))) +
geom_density_ridges_gradient(scale = 1) +
geom_vline(xintercept = 0, linetype = 2) +
labs(x = "NRSI", y = "", title = "Cabo Pulmo Region") +
scale_fill_gradientn(colors = c("firebrick", "orange", "gray", "green", "darkgreen"),
values = scales::rescale(c(-1, -0.75, -0.5, 0.5, 0.75, 1)))+
xlim(-0.8,0.8) +
#facet_grid(~protection) +
theme_bw() +
theme(legend.position = "",
axis.text.x = element_text(angle = 90, vjust = .5))
p2 <- sampled_fully_protected_reefs %>%
st_as_sf(., coords = c("longitude", "latitude"), crs = 4326) %>%
ggplot() +
geom_sf(data = dafishr::mx_coastline, fill = NA) +
geom_sf(data = dafishr::all_mpas %>% filter(NOMBRE == "Cabo Pulmo"), fill = "gray90", alpha = .4) +
geom_sf(aes(col=protection)) +
coord_sf(ylim = c(23.3, 23.55), xlim = c(-109.5, -109.3)) +
labs(col = "") +
theme_bw() +
theme(legend.position = "top")
p1+p2We inspect if NRSI is influenced by environmental conditions other than the protection level of the reefs.
#### ANALYSIS OF ENVIORNMENTAL ASSOCIATION WITH NRSI
fish <- read_csv("data/fish_database_submission.csv")
npp <- read_rds("data/LTEM_reefs_chl_yearly_average.RDS") %>%
select(reef = Reef, year, mean_chl) %>%
group_by(year, reef) %>%
summarise(mean_chl = mean(mean_chl))
sst <- read_rds("data/LTEM_reefs_SST_yearly_average.RDS") %>%
select(reef = Reef, year, mean_sst) %>%
group_by(year, reef) %>%
summarise(mean_sst = mean(mean_sst))
nrsi <- fish %>%
filter(year > 2009) %>%
mutate(index_levels = case_when(
str_detect(trophic_level_f, "4-4.5") ~ "UTL",
str_detect(trophic_level_f, "2-2.5") ~ "LTL",
TRUE ~ "CTL"
),
protection = factor(protection, levels = c("Not Protected", "Lightly Protected", "Fishing Refuge", "Fully Protected"))) %>%
mutate(degree = round(latitude)) %>%
group_by(year, region, reef, protection, degree, latitude, longitude, depth2, index_levels, transect) %>%
summarise(biomass = sum(biomass, na.rm = TRUE)) %>%
group_by(year, region, reef, protection, index_levels, degree, latitude, longitude) %>%
summarise(biomass = mean(biomass, na.rm = TRUE)) %>%
group_by(year, region, reef, protection, degree, latitude, longitude) %>%
mutate(rel_biomass = (biomass / sum(biomass, na.rm = TRUE)) * 100) %>%
select(-biomass) %>%
pivot_wider(names_from = index_levels, values_from = rel_biomass) %>%
mutate(
UTL = if_else(is.na(UTL), 0, UTL), # Replace NA with 0 for UTL calculations
LTL = if_else(is.na(LTL), 0, LTL), # Replace NA with 0 for LTL calculations
CTL = if_else(is.na(CTL), 0, CTL), # Replace NA with 0 for CTL calculations
nrsi = case_when(
LTL > UTL + CTL ~ UTL / (UTL + CTL), # Condition 1: Use only UTL if LTL is greater than UTL + CTL
TRUE ~ (UTL + LTL - CTL) / (UTL + LTL + CTL) # Condition 2: Compute as normal otherwise
)
) %>%
left_join(., npp, by = c("year", "reef")) %>%
left_join(., sst, by = c("year", "reef")) %>%
filter(!is.na(mean_chl))
nrsi_data <- nrsi %>%
mutate(protection = factor(protection)) First, we run a beta regression model using latitude, sea surface temperature, and chlorophyll a content, all obtained through remote sensing data using year averages.
# Load necessary packages
library(dplyr)
library(betareg)
# Prepare data
nrsi_data <- nrsi_data %>%
filter(!is.na(mean_sst)) %>% # Remove rows with NA in mean_sst
mutate(protection = as.factor(protection))
nrsi_data$transformed_index = (nrsi_data$nrsi + 1) / 2
nrsi_data <- nrsi_data %>% filter(transformed_index > 0)
# Fit beta regression model including environmental variables
beta_model <- betareg(transformed_index ~ protection +
latitude + mean_chl + mean_sst, data = nrsi_data)
# Summary of the model
summary(beta_model)
Call:
betareg(formula = transformed_index ~ protection + latitude + mean_chl +
mean_sst, data = nrsi_data)
Standardized weighted residuals 2:
Min 1Q Median 3Q Max
-5.3189 -0.5823 0.0503 0.6828 3.9777
Coefficients (mean model with logit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.99522 1.46724 0.678 0.498
protectionNot Protected -0.06650 0.06426 -1.035 0.301
protectionFully Protected 0.68357 0.08304 8.232 < 2e-16 ***
protectionFishing Refuge 0.06356 0.09979 0.637 0.524
latitude -0.01960 0.02369 -0.827 0.408
mean_chl -0.36039 0.08826 -4.083 4.44e-05 ***
mean_sst -0.02758 0.03939 -0.700 0.484
Phi coefficients (precision model with identity link):
Estimate Std. Error z value Pr(>|z|)
(phi) 6.7000 0.3076 21.78 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Type of estimator: ML (maximum likelihood)
Log-likelihood: 296.6 on 8 Df
Pseudo R-squared: 0.1822
Number of iterations: 17 (BFGS) + 2 (Fisher scoring)
The protection levels (Fully Protected) is singificantly higher and we found no significant effects for environmental parameters except for chlorophyll.
Such relationship is interesting so we explore it further by plotting NRSI against chla.
nrsi_data %>%
ggplot(aes(x=nrsi, y=mean_chl, col = protection)) +
geom_point()It is evident that while a relationship seems to exist, it is because Fully Protected are much more common in oligotrophic condition. We can explore this further.
ggplot(nrsi_data, aes(x=nrsi, y=mean_chl, col = protection)) +
geom_point() +
facet_grid(~protection) +
geom_smooth(method = "lm")There is a significant difference between chla values and protection levels, particularly for Fully Protected areas.
anova_result <- aov(mean_chl ~ protection, data = nrsi_data)
summary(anova_result) Df Sum Sq Mean Sq F value Pr(>F)
protection 3 20.42 6.807 69.63 <2e-16 ***
Residuals 832 81.34 0.098
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Perform Tukey's HSD test
tukey_results <- TukeyHSD(anova_result)
# View results
print(tukey_results) Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = mean_chl ~ protection, data = nrsi_data)
$protection
diff lwr upr p adj
Not Protected-Lightly Protected -0.1739941 -0.24062308 -0.10736515 0.0000000
Fully Protected-Lightly Protected -0.4372828 -0.51718334 -0.35738224 0.0000000
Fishing Refuge-Lightly Protected -0.0484950 -0.15714918 0.06015917 0.6592342
Fully Protected-Not Protected -0.2632887 -0.33893439 -0.18764295 0.0000000
Fishing Refuge-Not Protected 0.1254991 0.01993441 0.23106382 0.0122038
Fishing Refuge-Fully Protected 0.3887878 0.27438242 0.50319314 0.0000000
# Summary statistics for mean_chl by protection level
summary_stats <- nrsi_data %>%
group_by(protection) %>%
summarise(
mean_mean_chl = mean(mean_chl, na.rm = TRUE),
median_mean_chl = median(mean_chl, na.rm = TRUE),
sd_mean_chl = sd(mean_chl, na.rm = TRUE),
n = n()
)
# Display summary statistics
summary_stats# A tibble: 4 × 5
protection mean_mean_chl median_mean_chl sd_mean_chl n
<fct> <dbl> <dbl> <dbl> <int>
1 Lightly Protected 0.904 0.802 0.363 254
2 Not Protected 0.730 0.671 0.353 343
3 Fully Protected 0.467 0.496 0.126 169
4 Fishing Refuge 0.856 0.865 0.192 70
This suggest that the significant effect of chla is mostly due to the spatial position of fully protected areas and not necessarily to the influence of chla to the NRSI.